# Load packages
library(ggplot2)
library(dplyr)
library(lme4)
library(tm)
library(proxy)
library(stargazer)

# Set seed
set.seed(1996)
#############################
rm(list=setdiff(ls(), c('script', 'scripts', 'log_file')))
#############################
# Load Data
master <- readRDS('incumbent_challengerdf.rds')

# Create IV (WEB)
master$challenger_factor_web <- master$challenger_position_web_1
master$challenger_factor_web[master$candvotes == 1] <- 'No Challenger'
master$challenger_factor_web <- factor(master$challenger_factor_web, c('No Challenger', 'Moderate', 'Extreme'))

# Load in issue text
paragraphs <- readRDS('paragraphs_cleaned.rds')
# Isolate just paragraph ID and gpt_score
paragraphs_scores <- readRDS('paragraphs_withgpt.rds')[,c('paragraph_id', 'gpt_score')]
# Merge with full paragraphs with issue information
paragraphs <- merge(paragraphs, paragraphs_scores, by = 'paragraph_id')

# Create Issue Variables
master$abortion <- NA
master$education <- NA
master$energy <- NA
master$environment_climate <- NA
master$guns <- NA
master$healthcare <- NA
master$immigration <- NA

# Loop over paragraphs
for(i in 1:nrow(paragraphs)){
  if(is.na(master$web_score[i])){
    next
  }
  # Subset to Candidate
  temp <- subset(paragraphs, candidate == master$candidate[i] & 
                   state_postal == master$state_postal[i] &
                   cd == master$cd[i] &
                   year == master$year[i])
  
  # Create Scores based on average scoring of statements about a policy area
  master$abortion[i] <- mean(subset(temp, abortion == 1)$gpt_score, na.rm = TRUE)
  master$education[i] <- mean(subset(temp, education == 1)$gpt_score, na.rm = TRUE)
  master$energy[i] <- mean(subset(temp, energy == 1)$gpt_score, na.rm = TRUE)
  master$environment_climate[i] <- mean(subset(temp, environment_climate == 1)$gpt_score, na.rm = TRUE)
  master$guns[i] <-  mean(subset(temp, guns == 1)$gpt_score, na.rm = TRUE)
  master$healthcare[i] <-  mean(subset(temp, healthcare == 1)$gpt_score, na.rm = TRUE)
  master$immigration[i] <-  mean(subset(temp, immigration == 1)$gpt_score, na.rm = TRUE)
}

# Flip Scores for Democrats to create consistent interpretation 
master$abortion <- ifelse(master$cand_party == 'Democrat', (master$abortion - mean(subset(master, cand_party == 'Democrat')$abortion, na.rm = TRUE))*-1, 
                          master$abortion - mean(subset(master, cand_party == 'Republican')$abortion, na.rm = TRUE))
# Rescale to have mean of zero and sd of 1
master$abortion <- scale(master$abortion)

# Flip Scores for Democrats to create consistent interpretation 
master$education <- ifelse(master$cand_party == 'Democrat', (master$education - mean(subset(master, cand_party == 'Democrat')$education, na.rm = TRUE))*-1, 
                          master$education - mean(subset(master, cand_party == 'Republican')$education, na.rm = TRUE))

master$education <- scale(master$education)

# Flip Scores for Democrats to create consistent interpretation 
master$energy <- ifelse(master$cand_party == 'Democrat', (master$energy - mean(subset(master, cand_party == 'Democrat')$energy, na.rm = TRUE))*-1, 
                          master$energy - mean(subset(master, cand_party == 'Republican')$energy, na.rm = TRUE))
master$energy <- scale(master$energy)

# Flip Scores for Democrats to create consistent interpretation 
master$environment_climate <- ifelse(master$cand_party == 'Democrat', (master$environment_climate - mean(subset(master, cand_party == 'Democrat')$environment_climate, na.rm = TRUE))*-1, 
                          master$environment_climate - mean(subset(master, cand_party == 'Republican')$environment_climate, na.rm = TRUE))
master$environment_climate <- scale(master$environment_climate)

# Flip Scores for Democrats to create consistent interpretation 
master$guns <- ifelse(master$cand_party == 'Democrat', (master$guns - mean(subset(master, cand_party == 'Democrat')$guns, na.rm = TRUE))*-1, 
                          master$guns - mean(subset(master, cand_party == 'Republican')$guns, na.rm = TRUE))
master$guns <- scale(master$guns)

# Flip Scores for Democrats to create consistent interpretation 
master$healthcare <- ifelse(master$cand_party == 'Democrat', (master$healthcare - mean(subset(master, cand_party == 'Democrat')$healthcare, na.rm = TRUE))*-1, 
                          master$healthcare - mean(subset(master, cand_party == 'Republican')$healthcare, na.rm = TRUE))
master$healthcare <- scale(master$healthcare)

# Flip Scores for Democrats to create consistent interpretation 
master$immigration <- ifelse(master$cand_party == 'Democrat', (master$immigration - mean(subset(master, cand_party == 'Democrat')$immigration, na.rm = TRUE))*-1, 
                          master$immigration - mean(subset(master, cand_party == 'Republican')$immigration, na.rm = TRUE))
master$immigration <- scale(master$immigration)


#############################
# Table I2: Issue Positioning Response
#############################
# Abortion 
m3_abortion <- lm(abortion ~ challenger_factor_web + as.factor(year) + as.factor(FECCandID), 
                   data = subset(master, year != 2022))

# education
m3_education <- lm(education ~ challenger_factor_web + as.factor(year) + as.factor(FECCandID), 
                    data = subset(master, year != 2022))

# energy
m1_energy <- lmer(energy ~ challenger_factor_web + as.factor(year) + (1|FECCandID), 
                   data = subset(master, year != 2022))

# energy
m3_energy <- lm(energy ~ challenger_factor_web + as.factor(year) + as.factor(FECCandID), 
                 data = subset(master, year != 2022))

# environment_climate
m3_environment_climate <- lm(environment_climate ~ challenger_factor_web + as.factor(year) + as.factor(FECCandID), 
                              data = subset(master, year != 2022))

# guns
m3_guns <- lm(guns ~ challenger_factor_web + as.factor(year) + as.factor(FECCandID), 
               data = subset(master, year != 2022))

# healthcare
m3_healthcare <- lm(healthcare ~ challenger_factor_web + as.factor(year) + as.factor(FECCandID), 
                     data = subset(master, year != 2022))

# immigration
m3_immigration <- lm(immigration ~ challenger_factor_web + as.factor(year) + as.factor(FECCandID), 
                      data = subset(master, year != 2022))

# Print Analysis Output for Appendix Table 11
stargazer(m3_abortion, m3_education, m3_energy, m3_environment_climate,
          keep = c('challenger_factor_web', 'Constant'))

stargazer(m3_guns, m3_healthcare, m3_immigration,
          keep = c('challenger_factor_web', 'Constant'))





